#dataframe <- read_excel("~/Research/path/to/folder/file.xlsx")
#View(dataframe)ggplot_quarto
Importing Data
From excel
From csv/tsv
Use read.table function: first argument specify path to file, sep argument tells R what separates the columns of your data (space, comma, or tab). Example below is for tab separated TSV. header = T (header = True) means the first row is the headers for columns.
RGFP_input_tagLen <- read.table("~/Research/LH_57_csRNA/Hela_sync_RGFP_input/tagLengthDistribution.txt",
sep="\t", header = T)Hiding Code
You can embed plots without showing code:
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.
Data cleanup
Remove a pattern from a column name
Below example replaces “synchela” with “_”
#dataframe %>% rename_with(~ gsub("_synchela_", "_", .x), contains("_synchela"))TidyR
gather() (outdated, use pivot_longer now!)
Convention: Gather(new independent var, new dependent var, column1, column2, column3, columnN)
- puts column names in new independent var
- values in new dependent var
LH51_hg38only_phredtrim_mapq_tidy <- LH51_phredtrim_hg38_MAPQscores %>% gather(Sample, Counts, a_count, b_count, c_count, d_count, q_count, r_count, s_count, t_count)pivot_longer()
counts_megapeaks_hg38_LH56_tidy <- counts_megapeaks_hg38_LH56_clean %>%
select(PeakID, contains("Tag")) %>%
pivot_longer(
cols = !PeakID, # pivot all cols except PeakID
names_to = "Sample", # sample names become factor col
values_to = "Peak_Tags") # all values in new colBase Plots
Line
Plot_line <- ggplot(data=dataframe, aes(x=xvariable)) + geom_line(aes(y=yvariable1, color= ”name/hexcode”)), + geom_line(aes(y=yvariable2, color= “name/hexcode”)), + geom_line(etc.) + labs (x = “X Axis Label”, y = “Y Axis Label”, title = “Plot title”)
ggplot(mtcars, aes(wt, mpg, group = cyl)) +
geom_line(aes(color = cyl)) +
labs(title = "Miles per Gallon vs Car Weight, Colored by # of Cylinders")Histogram
density_hist <- ggplot(data=iris, aes(x=Sepal.Width, fill=Species))
density_hist +
geom_density(stat="density", alpha=I(0.2)) +
xlab("Sepal Width") +
ylab("Density") +
ggtitle("Histogram & Density Curve of Sepal Width")Pie
input_presence <- data.frame(input_status = c("sufficient inputs", "some inputs missing", "no inputs present"), paper_count = c(27, 12, 14))library(scales)
Attaching package: 'scales'
The following object is masked from 'package:purrr':
discard
The following object is masked from 'package:readr':
col_factor
input_piechart <- ggplot(input_presence, aes(x="", y= paper_count, fill=input_status)) +
geom_bar(stat="identity", width=1, alpha = 0.9) +
coord_polar("y", start=0) +
theme_void() +
geom_text(aes(y = paper_count/3 + c(0, cumsum(paper_count)[-length(paper_count)]),
label = paper_count), size=5) +
scale_fill_manual(values = c("firebrick3", "darkorange2","goldenrod1"), name = "Status of inputs") +
labs(title = "Status of Non-IP Input Controls in ChIP-seq Spike-in Datasets",
subtitle = "n = 53")
input_piechartScatterplot
ggplot(diamonds, aes(carat, price, color = clarity)) +
geom_point()Remove outline of point (for scatterplots with many overlapping points)
ggplot(diamonds, aes(carat, price, color = clarity)) +
geom_point(stroke = NA, alpha = 0.3)NOTE: for scatterplots the title in labs() must come first and be lower case
Density Scatterplots
Make some data
# Data
a <- data.frame(x=rnorm(20000, 10, 1.9), y=rnorm(20000, 10, 1.2) )
b <- data.frame(x=rnorm(20000, 14.5, 1.9), y=rnorm(20000, 14.5, 1.9) )
c <- data.frame(x=rnorm(20000, 9.5, 1.9), y=rnorm(20000, 15.5, 1.9) )
data <- rbind(a,b,c)
# Basic scatterplot
ggplot(data, aes(x=x, y=y) ) +
geom_point()In the above graph, where is the highest density of points?
Density plots - prevent overplotting Instead we should plot a 2D histogram
Default:
# 2d histogram with default option
ggplot(data, aes(x=x, y=y) ) +
geom_bin2d() +
theme_bw()Make it pretty:
# Bin size control + color palette
ggplot(data, aes(x=x, y=y) ) +
geom_bin2d(bins = 70) +
scale_fill_continuous(type = "viridis") +
theme_bw()2D Density plot with Hex bins
ggplot(data, aes(x=x, y=y)) +
geom_hex() +
theme_bw()Warning: Computation failed in `stat_binhex()`.
Caused by error in `compute_group()`:
! The package "hexbin" is required for `stat_bin_hex()`.
# Bin size control + color palette
ggplot(data, aes(x=x, y=y)) +
geom_hex(bins = 70) +
scale_fill_continuous(type = "viridis")Warning: Computation failed in `stat_binhex()`.
Caused by error in `compute_group()`:
! The package "hexbin" is required for `stat_bin_hex()`.
2d distribution with geom_density_2d or stat_density_2d
Contour only
ggplot(data, aes(x=x, y=y) ) +
geom_density_2d()Area only
ggplot(data, aes(x=x, y=y) ) +
stat_density_2d(aes(fill = ..level..), geom = "polygon")Warning: The dot-dot notation (`..level..`) was deprecated in ggplot2 3.4.0.
ℹ Please use `after_stat(level)` instead.
Area + Contour
ggplot(data, aes(x=x, y=y) ) +
stat_density_2d(aes(fill = ..level..),
geom = "polygon", colour="white")Using Raster
ggplot(data, aes(x=x, y=y) ) +
stat_density_2d(aes(fill = ..density..), geom = "raster", contour = FALSE) +
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
theme(
legend.position='none'
)Scatterplot + Smoothing Line
ggplot(diamonds, aes(carat, price, color = clarity)) +
geom_point() +
geom_smooth() +
labs(title = "Price of Diamonds by Cut and Clarity")`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
Add Linear model
ggplot(diamonds, aes(carat, price, color = clarity)) +
geom_point(alpha = 0.1) +
geom_smooth(method = "lm") +
labs(title = "Price of Diamonds by Cut and Clarity")`geom_smooth()` using formula = 'y ~ x'
Text Scatter Plot
# Base layer
plt_mpg_vs_wt <- ggplot(mtcars, aes(wt, mpg))
# Use text layer and map cyl to label
plt_mpg_vs_wt +
geom_text(aes(label = mtcars$cyl)) +
labs(title = "MPG vs weight for Cars")Warning: Use of `mtcars$cyl` is discouraged.
ℹ Use `cyl` instead.
Above plot is the weight of the car vs the miles per gallon, with each “point” being labeled with the number of cylinders the car has (a categorical variable, values either 4, 6 or 8)
Label points with text of row names:
ggplot(mtcars, aes(wt, mpg, color = cyl)) +
geom_text(label = rownames(mtcars), color = "red")Barplots - Stacked
ggplot(iris, aes(Sepal.Length, fill = Species)) +
geom_bar() +
labs(x = "Species", y = "Sepal Length")Manual Color Scheme and Labeling
palette <- c(setosa = "#387EB8", versicolor = "#D11A1C", virginica = "#314EB9")
ggplot(iris, aes(Sepal.Length, fill = Species)) +
geom_bar() +
labs(x = "Species", y = "Count") +
# Set the fill color scale
scale_fill_manual("Transmission", values = palette)Barplots - side by side
# Set the position
ggplot(iris, aes(Sepal.Length, fill = Species)) +
geom_bar(position = "dodge") +
labs(x = "Species", y = "Count") +
scale_fill_manual("Species", values = palette) Violin Plot
ggplot(data = diamonds, aes(x=cut, y=price, fill = cut)) +
geom_violin()Dotplot (2 category variables)
setwd("~/Research/R_for_fun/data_vis")
species <- read_csv("national_parks_biodiv/species.csv")
species_clean <- species %>%
filter(Occurrence == "Present") %>%
rename_with(~ gsub(" ", "_", .x), contains(" "))
# removed spaces in variable names
species_clean$Park_Name <- gsub(" National Park", "",
as.character(species_clean$Park_Name))
# removed "National Park" from Park_Name
acadia <- filter(species_clean, Park_Name == "Acadia")
ggplot(data = acadia) +
aes(x = Category, y = Abundance, color = Abundance) +
geom_count() +
labs(title = "Abundance of Biodiversity in \nAcadia National Park")
## linebreak in titleRidgeline
library(ggridges)In most cases you won’t need a legend here since each row of the ridgeline plot is clearly visible. Repress the legend with theme(legend.position = "none")
ggplot(diamonds) +
aes(x = price, y = clarity, fill = clarity) +
geom_density_ridges() +
theme(legend.position = "none")Picking joint bandwidth of 403
Can also make a histogram:
ggplot(diamonds) +
aes(x = price, y = clarity, fill = clarity) +
geom_density_ridges(alpha = 0.6, stat = "binline", bins = 50) +
theme(legend.position = "none")You can also map the color fill to a numeric variable (for instance the price on the x axis). There are two requirements:
- Use
geom_density_ridges_gradient()orgeom_ridgeline_gradient() - Instead of direcly settging the fill to a variable (ex.
fill = pricein the example below) you have to set it to call the variable you have already defined. i.e. if you want fill = price, and x = price, then fill = stat(x).
Also note I used log(price) because otherwise the upper outliers in price warp the scale and no changes in color are noticeable (i.e. purely for illustrative purposes)
ggplot(diamonds) +
aes(x = log(price), y = clarity, fill = stat(x)) +
geom_density_ridges_gradient(scale = 2) +
theme(legend.position = "none") +
scale_fill_distiller("RdYlBu")Warning: `stat(x)` was deprecated in ggplot2 3.4.0.
ℹ Please use `after_stat(x)` instead.
Picking joint bandwidth of 0.144
Marginal Histograms
library(ggExtra)diamonds_scatter <- ggplot(diamonds, aes(carat, price, color = clarity)) +
geom_point() +
labs(title = "Price of Diamonds by Cut and Clarity")
ggMarginal(diamonds_scatter, groupColour = TRUE, groupFill = TRUE)Add-ons
Set Properties to Categorical Variable
In the
geom, insideaes()set a property (i.e. alpha, shape, size, etc.) =Categorial Variable
Add
scale_property_manual(values = c(value1, value2,etc), name = "Categorial Variable", labels = c("Category 1", "Category 2"))where “property” is alpha, shape, color, etc.
A big advantage of this method is you can set the name of the legend and the categories within the scale_property_manual, so if the variable name is long/cumbersome you can make the legend easier to read
mtcars$cyl = as.factor(mtcars$cyl)
ggplot(mtcars, aes(wt, mpg, group = cyl)) +
geom_point(aes(alpha = cyl), color = "firebrick", size = 4) +
labs(title = "Miles per Gallon vs Car Weight") +
scale_alpha_manual(values = c(0.2, 0.5, 1), name = "Cylinders", labels = c("4", "6", "8"))Faceting
Subplots based on a categorical variable
ggplot(diamonds, aes(carat, price, color = clarity)) +
geom_point(alpha = 0.5, stroke = NA) +
geom_smooth() + facet_wrap(~ clarity)`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
Subplots in columns and rows (split by 2 variables)
# Base plot
insurance <- read_csv("~/Research/R_for_fun/archive/insurance.csv")
smoker_insurance <- ggplot(data = insurance) +
aes(x = smoker, y= charges) +
geom_jitter()
# Graph by sex and region
smoker_insurance +
geom_jitter(aes(color = bmi)) +
facet_grid(cols = vars(region), rows = vars(sex))Reordering subplots:
Reordering group factor levels
data_new<- rep(c("A","B", "C", "D"), each = 5)
col2 <- rep(c("12", "10"), each = 10)
data_new$Values <- col2Warning in data_new$Values <- col2: Coercing LHS to a list
#View(data_new)
data_new$group <- factor(data_new$group,levels = c("B", "A", "C", "D"))
head(data_new)[[1]]
[1] "A"
[[2]]
[1] "A"
[[3]]
[1] "A"
[[4]]
[1] "A"
[[5]]
[1] "A"
[[6]]
[1] "B"
faceting other plot types:
ggplot(diamonds) +
aes(x = log(price), y = clarity) +
geom_density_ridges_gradient(aes(x = log(price), fill = stat(x)), alpha = 0.4) +
scale_fill_distiller(palette = "Spectral") +
facet_wrap(~ color)Picking joint bandwidth of 0.227
Picking joint bandwidth of 0.179
Picking joint bandwidth of 0.199
Picking joint bandwidth of 0.194
Picking joint bandwidth of 0.204
Picking joint bandwidth of 0.234
Picking joint bandwidth of 0.306
Faceting and zooming on subset of plot
Example of a plot that needs zooming:
public_data_submit_revisions <- read.delim("~/Research/spike_commentary/public_data_submit_revisions.tsv")
public_data_submit_revisions$filtered.spike_target <- as.numeric(public_data_submit_revisions$filtered.spike_target)
public_data_submit_revisions <- public_data_submit_revisions %>%
group_by(Author) %>%
drop_na(filtered.spike_target) %>%
mutate(inputvar = (filtered.spike_target)/min(filtered.spike_target)) %>%
mutate(meanvar = mean(inputvar))public_data_submit_revisions <- public_data_submit_revisions %>%
arrange(meanvar) %>%
group_by(Author = factor(Author, levels = unique(Author))) %>%
mutate(group = cur_group_id())public_data_submit_revisions %>%
drop_na(inputvar) %>% group_by(Author) %>%
arrange(meanvar) %>%
ggplot() +
aes(x = as.factor(group), y = inputvar) +
geom_jitter(size = 4, alpha = 0.5, width = 0.2) +
geom_hline(yintercept = 1) +
labs(title = "Variation in input spike-in/target chromatin ratios",
y = "input/target", x = "Dataset") +
theme(legend.position = "none")library(ggforce)
public_data_submit_revisions %>%
drop_na(filtered.spike_target) %>% ggplot() +
aes(x = as.factor(group), y = inputvar) +
geom_jitter(size = 6, alpha = 0.6, width = 0.1) +
geom_hline(yintercept = 1) +
labs(title = "Variation in input spike-in/target chromatin ratios", y = "Relative ratio of inputs within a dataset",
x = "Dataset") +
facet_zoom(ylim = c(0.7, 2.5))Better way: break plot with facets into 1-19, then 20-23, and scale axes
- can get break in rows, while scaling equally
- scales = “free_x” removes the empty entries in each facet
- space = “free” scales each facet by the range of x values, keeps them proportional
library(ggh4x)
library(RColorBrewer)
public_data_submit_revisions %>%
drop_na(inputvar) %>% ggplot() +
aes(x = as.factor(group), y = inputvar) +
geom_jitter(size = 3.5, alpha = 0.6, width = 0.17, color = "navy", pch=21, fill = "dodgerblue2", stroke = 1.5) +
geom_hline(yintercept = 1) +
labs(title = "Variation in input spike-in/target chromatin ratios",
y = "Relative ratio of inputs within a dataset",
x = "Dataset") +
ggh4x::facet_grid2(~ group > 26, scales = "free", independent = "y", space = "free") +
scale_color_manual(
values = colorRampPalette(brewer.pal(9, "YlGnBu"))(13)[1:13])Positioning
Scatterplot positioning
Default
ggplot(iris) +
aes(Sepal.Length, Sepal.Width, color = Species) +
geom_point()Jitter
ggplot(iris) +
aes(Sepal.Length, Sepal.Width, color = Species) +
geom_jitter()Or
ggplot(iris) +
aes(Sepal.Length, Sepal.Width, color = Species) +
geom_point(position= "jitter")Legend positioning
Use legend.position = c([HORIZONTAL], [VERTICAL]) where
- [HORIZONTAL] = integer from (0, 1), 0 left, 1 right
- [VERTICAL] = integer from (0, 1), 0 bottom, 1 top
ggplot(iris) +
aes(Sepal.Length, Sepal.Width, color = Species) +
geom_point() +
coord_cartesian(ylim = c(2,5)) +
theme(legend.position = c(0.85, 0.83))Warning: A numeric `legend.position` argument in `theme()` was deprecated in ggplot2
3.5.0.
ℹ Please use the `legend.position.inside` argument of `theme()` instead.
Add border to legend if inside graph to reduce chance of misinterpretation:
ggplot(iris) +
aes(Sepal.Length, Sepal.Width, color = Species) +
geom_point() +
coord_cartesian(ylim = c(2,5)) +
theme(legend.position = c(0.85, 0.83),
legend.background = element_rect(
linewidth=0.7, linetype="solid",
colour ="grey20"))Plot on log scale
Plot_type + scale_x_log10() + scale_y_log10()
ggplot(iris) +
aes(Sepal.Length, Sepal.Width, color = Species) +
geom_point(position= "jitter") +
scale_x_log10() + scale_y_log10()Set x and y axes
Strategy 1: Scale_x_continuous
Remove points outside the viewing window (will affect regression lines, etc)
plot + scale_x_continuous(limits = c(xmin, xmax)) + scale_y_continuous(limits = c(ymin, ymax))
ggplot(iris) +
aes(Sepal.Length, Sepal.Width, group = Species, color = Species) +
geom_point(position= "jitter") +
scale_x_continuous(limits = c(4, 7)) +
scale_y_continuous(limits = c(1, 5))Warning: Removed 13 rows containing missing values or values outside the scale range
(`geom_point()`).
Strategy 2: coord_cartesian
Set viewing window without removing points that may fall outside (they just won’t be visible)
plot + coord_cartesian(xlim=c(xmin, xmax), ylim=c(ymin, ymax))
ggplot(iris) +
aes(Sepal.Length, Sepal.Width, group = Species, color = Species) +
geom_point(position= "jitter") +
coord_cartesian(xlim = c(4, 7), ylim = c(1, 5))Add regression line
Plot_object + geom_smooth(method=lm)
For multiple regression lines by group, set group = Species within aes()
ggplot(iris) +
aes(Sepal.Length, Sepal.Width, group = Species, color = Species) +
geom_point(position= "jitter") +
geom_smooth(method = lm, fill = NA)`geom_smooth()` using formula = 'y ~ x'
Add x = y line
Plot_object + geom_abline()
Manual tick lines
plot_object + scale_y_continuous(breaks=seq(ymin, ymax, interval))
Coloring
Viridis Colorblind-friendly library
Add with library(viridis)
library(viridis)Loading required package: viridisLite
Attaching package: 'viridis'
The following object is masked from 'package:scales':
viridis_pal
Continuous Color Schemes
Plotting for continuous variables
smoker_insurance +
geom_jitter(aes(color = bmi)) +
facet_grid(cols = vars(region), rows = vars(sex)) +
scale_color_viridis(option = "magma")Discrete Color Schemes
knitr::include_graphics("C:\\Users\\lhodg\\Documents\\Research\\R_for_fun\\data_vis\\viridis_discrete.png")Plotting for Discrete color schemes, make sure to set discrete = TRUE
ggplot(data = acadia) +
aes(x = Category, y = Abundance, color = Abundance) +
geom_count() +
labs(title = "Abundance of Biodiversity in Acadia National Park") +
scale_color_viridis(option = "magma", discrete = TRUE)R Color Brewer
Get hex codes from the color schemes:
brewer.pal(n=5,"Set1")[1] "#E41A1C" "#377EB8" "#4DAF4A" "#984EA3" "#FF7F00"
Extend colors (if you need more colors, or want to exclude upper/lower colors)
colorRampPalette(brewer.pal(9, "YlGnBu"))(12)[4:11][1] "#B9E3B5" "#85CFBA" "#57BEC0" "#33A8C2" "#1D8CBD" "#2167AC" "#23479D"
[8] "#1D2D83"
ggplot(data = acadia) +
aes(x = Category, y = Abundance, color = Abundance) +
geom_count() +
labs(title = "Abundance of Biodiversity in Acadia National Park") +
scale_color_manual(
values = colorRampPalette(brewer.pal(9, "YlGnBu"))(10)[3:9],
name = "Abundance")non Rcolorbrewer palettes
library(grDevices) # library for color palettes
library(unikn) # library needed for plot belowWelcome to unikn (v1.0.0)!
simcol() searches for similar colors.
n <- 10
h1 <- hcl.colors(n, palette = "Dynamic")
h2 <- hcl.colors(n, palette = "Earth")
h3 <- hcl.colors(n, palette = "Terrain")
h4 <- hcl.colors(n, palette = "Berlin")
h5 <- hcl.colors(n, palette = "Oslo")
h6 <- hcl.colors(n, palette = "Lajolla")
h7 <- hcl.colors(n, palette = "Cork")
h8 <- hcl.colors(n, palette = "Vik")
h9 <- hcl.colors(n, palette = "Fall")
h10 <- hcl.colors(n, palette = "Sunset")
h11 <- hcl.colors(n, palette = "Purple-Orange")
h12 <- hcl.colors(n, palette = "Dark-Mint")
h13 <- hcl.colors(n, palette = "Peach")
h15 <- hcl.colors(n, palette = "Roma")
seecol(list(h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11, h12, h13, h15),
col_brd = "white", lwd_brd = 4,
title = "hcl.colors NOT in RColorBrewer (n = 10)",
pal_names = c("Dynamic", "Earth", "Terrain", "Berlin", "Oslo", "Lajolla", "Cork", "Vik", "Fall", "Sunset", "Purple-Orange", "Dark-Mint", "Peach", "Roma"))See all hcl palettes:
hcl.pals() [1] "Pastel 1" "Dark 2" "Dark 3" "Set 2"
[5] "Set 3" "Warm" "Cold" "Harmonic"
[9] "Dynamic" "Grays" "Light Grays" "Blues 2"
[13] "Blues 3" "Purples 2" "Purples 3" "Reds 2"
[17] "Reds 3" "Greens 2" "Greens 3" "Oslo"
[21] "Purple-Blue" "Red-Purple" "Red-Blue" "Purple-Orange"
[25] "Purple-Yellow" "Blue-Yellow" "Green-Yellow" "Red-Yellow"
[29] "Heat" "Heat 2" "Terrain" "Terrain 2"
[33] "Viridis" "Plasma" "Inferno" "Rocket"
[37] "Mako" "Dark Mint" "Mint" "BluGrn"
[41] "Teal" "TealGrn" "Emrld" "BluYl"
[45] "ag_GrnYl" "Peach" "PinkYl" "Burg"
[49] "BurgYl" "RedOr" "OrYel" "Purp"
[53] "PurpOr" "Sunset" "Magenta" "SunsetDark"
[57] "ag_Sunset" "BrwnYl" "YlOrRd" "YlOrBr"
[61] "OrRd" "Oranges" "YlGn" "YlGnBu"
[65] "Reds" "RdPu" "PuRd" "Purples"
[69] "PuBuGn" "PuBu" "Greens" "BuGn"
[73] "GnBu" "BuPu" "Blues" "Lajolla"
[77] "Turku" "Hawaii" "Batlow" "Blue-Red"
[81] "Blue-Red 2" "Blue-Red 3" "Red-Green" "Purple-Green"
[85] "Purple-Brown" "Green-Brown" "Blue-Yellow 2" "Blue-Yellow 3"
[89] "Green-Orange" "Cyan-Magenta" "Tropic" "Broc"
[93] "Cork" "Vik" "Berlin" "Lisbon"
[97] "Tofino" "ArmyRose" "Earth" "Fall"
[101] "Geyser" "TealRose" "Temps" "PuOr"
[105] "RdBu" "RdGy" "PiYG" "PRGn"
[109] "BrBG" "RdYlBu" "RdYlGn" "Spectral"
[113] "Zissou 1" "Cividis" "Roma"
hcl.swatch <- function(type = NULL, n = 5, nrow = 11,
border = if (n < 15) "black" else NA) {
palette <- hcl.pals(type)
cols <- sapply(palette, hcl.colors, n = n)
ncol <- ncol(cols)
nswatch <- min(ncol, nrow)
par(mar = rep(0.1, 4),
mfrow = c(1, min(5, ceiling(ncol/nrow))),
pin = c(1, 0.5 * nswatch),
cex = 0.7)
while (length(palette)) {
subset <- 1:min(nrow, ncol(cols))
plot.new()
plot.window(c(0, n), c(0, nrow + 1))
text(0, rev(subset) + 0.1, palette[subset], adj = c(0, 0))
y <- rep(subset, each = n)
rect(rep(0:(n-1), n), rev(y), rep(1:n, n), rev(y) - 0.5,
col = cols[, subset], border = border)
palette <- palette[-subset]
cols <- cols[, -subset, drop = FALSE]
}
par(mfrow = c(1, 1), mar = c(5.1, 4.1, 4.1, 2.1), cex = 1)
}
hcl.swatch("qualitative")hcl.swatch("sequential")hcl.swatch("diverging")hcl.swatch("divergingx")Manual Color Scheme and Labeling
# Define your palette of choice
iris_palette <- c(setosa = "slateblue", versicolor = "seagreen", virginica = "orchid")
# Plot using your palette, the first argument in scale_fill_manual is the legend title
ggplot(iris, aes(Petal.Width, fill = Species)) +
geom_bar() +
labs(x = "Species", y = "Count") +
scale_fill_manual("Species", values = iris_palette)Labels
All Font Sizes Any ## indicates numerical input, XX indicates text input
Plot_object + theme(axis.text.x = element_text(size=##), axis.text.y = element_text(size=##), axis.title = element_text(size=##), etc.)
Options for text size changing:
- axis.text.x
- axis.text.y
- strip.text.x : text in facet labels
custom legend labels
For line/dot plots: scale_color_discrete or scale_color_manual
scale_color_discrete(name = "Legend Title", labels = c("label1", "label2", "label3", "75%","95%", "100%"))
Example
ggplot(iris) +
aes(Sepal.Length, Sepal.Width, group = Species, color = Species) +
geom_point() +
scale_color_manual(
name = "Species",
labels = c("Setosa", "Versicolor", "Virginica"),
values = c("slateblue", "seagreen", "orchid")
)For bar/violin/similar: scale_fill_discrete
rotate labels
Plot_object + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
Format Date Labels
Standard date format: yyyy-mm-dd
Make a time series dataset:
set.seed(1234)
last_month <- Sys.Date() - 0:29
df <- data.frame(
date = last_month,
price = runif(30)
)Plot:
ggplot(data=df, aes(x = date, y = price)) +
geom_line()Format dates:
- Weekday name: use %a and %A for abbreviated and full weekday name, respectively
- Month name: use %b and %B for abbreviated and full month name, respectively
- %d: day of the month as decimal number
- %U: week of the year as decimal number (00-53)
- %Y: Year with century
ggplot(data=df, aes(x = date, y = price)) +
geom_line() +
scale_x_date(date_labels = "%b/%d")ggplot(data=df, aes(x = date, y = price)) +
geom_line() + scale_x_date(date_labels = "%U")format axes numbers
sci notation -> full numbers
options(scipen = 999)
library(gapminder)
ggplot(data = gapminder %>% filter(year == 2007) %>% arrange(-pop) %>% top_n(10, wt = pop)) +
aes(x = country, y = pop) +
geom_bar(stat = "identity")full numbers -> Add commas
scale_y_continuous(labels = scales::comma)
ggplot(data = gapminder %>% filter(year == 2007) %>% arrange(-pop) %>% top_n(10, wt = pop)) +
aes(x = country, y = pop) +
geom_bar(stat = "identity") +
scale_y_continuous(labels = scales::comma)Full numbers -> Millions or Billions
labels = scales::label_number_si()
ggplot(data = gapminder %>% filter(year == 2007) %>% arrange(-pop) %>% top_n(10, wt = pop)) +
aes(x = country, y = pop) +
geom_bar(stat = "identity") +
scale_y_continuous(labels = scales::label_number(scale_cut == cut_number()))Linebreak in plot labels
title = "text here \ntext here"
acadia <- filter(species_clean, Park_Name == "Acadia")
ggplot(data = acadia) +
aes(x = Category, y = Abundance, color = Abundance) +
geom_count() +
labs(title = "Abundance of Biodiversity in \nAcadia National Park")Combining plots
GridExtra - Gridarrange
library(gridExtra)
Attaching package: 'gridExtra'
The following object is masked from 'package:dplyr':
combine
Some dummy plots
p1 <- ggplot(data=iris) +
aes(x=Sepal.Width, fill=Species) +
geom_density(stat="density", alpha=I(0.2)) +
xlab("Sepal Width") + ylab("Density") +
ggtitle("Histogram & Density Curve of Sepal Width")
p2 <- ggplot(data, aes(x=x, y=y) ) +
geom_bin2d(bins = 70) +
scale_fill_continuous(type = "viridis") +
theme_bw()
p3 <- plt_mpg_vs_wt +
geom_text(aes(label = mtcars$cyl)) +
labs(title = "MPG vs weight for Cars")
p4 <- ggplot(data = diamonds) +
aes(x=cut, y=price, fill = cut) +
geom_violin() +
theme(legend.position = "none")
p5 <- ggplot(iris) +
aes(Sepal.Length, Sepal.Width, color = Species) +
geom_jitter()Combine with grid.arrange
# make a matrix of the layout - NA's are empty placeholders, each vector makes a row
lay <- rbind(c(1, NA, NA),
c(2, 3, NA),
c(4, 5, NA))
# gridarrange takes the plots (in order of the layout matrix) then the layout matrix
# can also set custom widths/heights for each row if you want
grid.arrange(p1, p2, p3, p4, p5,
layout_matrix = lay,
widths = c(5,6,5))